{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "name": "100ML_Day1.ipynb",
      "provenance": [],
      "authorship_tag": "ABX9TyPLTiPqo3dUYMEsOxHrA/bL"
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    }
  },
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "H82Z29m6Rmak"
      },
      "source": [
        "#Day 1 of ML CODE - Data PreProcessing. \r\n",
        "\r\n",
        "> Feature Encoding. \r\n",
        "\r\n",
        "> Missing Data Handling"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "yambPC4jOOoF"
      },
      "source": [
        "import numpy as np\r\n",
        "import pandas as pd\r\n",
        "import seaborn as sns\r\n",
        "import matplotlib.pyplot as plt"
      ],
      "execution_count": 2,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "msxFy1HUOxZi"
      },
      "source": [
        "df = pd.read_csv('https://raw.githubusercontent.com/Avik-Jain/100-Days-Of-ML-Code/master/datasets/Data.csv')"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 195
        },
        "id": "6gXIcfJMO10g",
        "outputId": "e4671027-d1c3-483c-92ed-e15818ea2f95"
      },
      "source": [
        "df.head()"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/html": [
              "<div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>Country</th>\n",
              "      <th>Age</th>\n",
              "      <th>Salary</th>\n",
              "      <th>Purchased</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>France</td>\n",
              "      <td>44.0</td>\n",
              "      <td>72000.0</td>\n",
              "      <td>No</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>Spain</td>\n",
              "      <td>27.0</td>\n",
              "      <td>48000.0</td>\n",
              "      <td>Yes</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>Germany</td>\n",
              "      <td>30.0</td>\n",
              "      <td>54000.0</td>\n",
              "      <td>No</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3</th>\n",
              "      <td>Spain</td>\n",
              "      <td>38.0</td>\n",
              "      <td>61000.0</td>\n",
              "      <td>No</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>4</th>\n",
              "      <td>Germany</td>\n",
              "      <td>40.0</td>\n",
              "      <td>NaN</td>\n",
              "      <td>Yes</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>"
            ],
            "text/plain": [
              "   Country   Age   Salary Purchased\n",
              "0   France  44.0  72000.0        No\n",
              "1    Spain  27.0  48000.0       Yes\n",
              "2  Germany  30.0  54000.0        No\n",
              "3    Spain  38.0  61000.0        No\n",
              "4  Germany  40.0      NaN       Yes"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 3
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "9MIkpfzhO3AA"
      },
      "source": [
        "X = df.iloc[:,:-1]\r\n",
        "y = df.iloc[:,-1]"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "0-wkGqNsPz1V"
      },
      "source": [
        "from sklearn.impute import SimpleImputer"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "-ERO7MkPPVRp"
      },
      "source": [
        "imputer = SimpleImputer(missing_values=np.nan, strategy='mean')"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "vGQJvTB0PWL3"
      },
      "source": [
        "imputer = imputer.fit(X.iloc[:,1:])\r\n",
        "\r\n",
        "imputed_data = imputer.transform(X.iloc[:,1:])"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "l9nXjHt4RZ3_"
      },
      "source": [
        "X.iloc[:,1:] = imputed_data"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 343
        },
        "id": "so6P1JuoSHMb",
        "outputId": "4ef43951-bd05-463c-b3e1-cd23c33f45dd"
      },
      "source": [
        "X"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/html": [
              "<div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>Country</th>\n",
              "      <th>Age</th>\n",
              "      <th>Salary</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>France</td>\n",
              "      <td>44.000000</td>\n",
              "      <td>72000.000000</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>Spain</td>\n",
              "      <td>27.000000</td>\n",
              "      <td>48000.000000</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>Germany</td>\n",
              "      <td>30.000000</td>\n",
              "      <td>54000.000000</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3</th>\n",
              "      <td>Spain</td>\n",
              "      <td>38.000000</td>\n",
              "      <td>61000.000000</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>4</th>\n",
              "      <td>Germany</td>\n",
              "      <td>40.000000</td>\n",
              "      <td>63777.777778</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>5</th>\n",
              "      <td>France</td>\n",
              "      <td>35.000000</td>\n",
              "      <td>58000.000000</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>6</th>\n",
              "      <td>Spain</td>\n",
              "      <td>38.777778</td>\n",
              "      <td>52000.000000</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>7</th>\n",
              "      <td>France</td>\n",
              "      <td>48.000000</td>\n",
              "      <td>79000.000000</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>8</th>\n",
              "      <td>Germany</td>\n",
              "      <td>50.000000</td>\n",
              "      <td>83000.000000</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>9</th>\n",
              "      <td>France</td>\n",
              "      <td>37.000000</td>\n",
              "      <td>67000.000000</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>"
            ],
            "text/plain": [
              "   Country        Age        Salary\n",
              "0   France  44.000000  72000.000000\n",
              "1    Spain  27.000000  48000.000000\n",
              "2  Germany  30.000000  54000.000000\n",
              "3    Spain  38.000000  61000.000000\n",
              "4  Germany  40.000000  63777.777778\n",
              "5   France  35.000000  58000.000000\n",
              "6    Spain  38.777778  52000.000000\n",
              "7   France  48.000000  79000.000000\n",
              "8  Germany  50.000000  83000.000000\n",
              "9   France  37.000000  67000.000000"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 21
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "5arKNj7sSOU4"
      },
      "source": [
        "#Encoding Categorical Data"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "0CAd3bs9STVj"
      },
      "source": [
        "from sklearn.preprocessing import LabelEncoder, OneHotEncoder"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "sKtzn6y4VJbt"
      },
      "source": [
        "labeler = LabelEncoder()\r\n",
        "\r\n",
        "labelled_X = labeler.fit_transform(X.iloc[:,0])  "
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "_PH7HFasVsbA",
        "outputId": "9f7de1bb-a999-4861-b2c5-5ef30ca11f0c"
      },
      "source": [
        "labelled_X"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "array([0, 2, 1, 2, 1, 0, 2, 0, 1, 0])"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 26
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "OpZdiLVEWAuB",
        "outputId": "ed79ddf1-16a4-4f6a-d964-f5dc31c1ef59"
      },
      "source": [
        "labeler.inverse_transform(labelled_X)"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "array(['France', 'Spain', 'Germany', 'Spain', 'Germany', 'France',\n",
              "       'Spain', 'France', 'Germany', 'France'], dtype=object)"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 27
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "2YBzQPPDWjnM"
      },
      "source": [
        "# ohe = OneHotEncoder()\r\n",
        "\r\n",
        "# ohe_labelled = ohe.fit_transform(X[['Country']].values)"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "RyPsS_AsXHEP"
      },
      "source": [
        "#"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "ABlJBvNoXQ2W"
      },
      "source": [
        "# onehotencoder = OneHotEncoder(categorical_features = [0])\r\n",
        "# # X = onehotencoder.fit_transform(X).toarray()\r\n",
        "# # labelencoder_Y = LabelEncoder()\r\n",
        "# # Y =  labelencoder_Y.fit_transform(Y)"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "aK5KqWImZaMe",
        "outputId": "889b28ec-1b00-45fe-c78a-5776b8762c0d"
      },
      "source": [
        "drop_enc = OneHotEncoder(drop='first').fit(X.iloc[:,[0]])\r\n",
        "drop_enc.categories_"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "[array(['France', 'Germany', 'Spain'], dtype=object)]"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 46
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "rnuSk7Kghywg"
      },
      "source": [
        "ohe_labelled = drop_enc.transform(X.iloc[:,[0]]).toarray()"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "b7YU4pWhieFk",
        "outputId": "76015b57-cc2b-4e54-f4a8-a0027fe8d4b6"
      },
      "source": [
        "ohe_labelled"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "array([[0., 0.],\n",
              "       [0., 1.],\n",
              "       [1., 0.],\n",
              "       [0., 1.],\n",
              "       [1., 0.],\n",
              "       [0., 0.],\n",
              "       [0., 1.],\n",
              "       [0., 0.],\n",
              "       [1., 0.],\n",
              "       [0., 0.]])"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 53
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "1FQD83DlifoB"
      },
      "source": [
        "# c = ['a','b','a','c','a','b']"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "gf_ZkctJkpzi"
      },
      "source": [
        "# ohe = OneHotEncoder()"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "YGXkueWxks3c"
      },
      "source": [
        "# ohe_fit = ohe.fit([c])"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "mGwvwh_WmFNE"
      },
      "source": [
        ""
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "WRKdp08ckvxa"
      },
      "source": [
        "# ohe_transformed = ohe.transform([c]).toarray()"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "go6UW4PflIWU"
      },
      "source": [
        "# ohe_transformed"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "7sFgB0PPOw0X"
      },
      "source": [
        "# Complete Encoding Example"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "q17W5YU8lKKv"
      },
      "source": [
        "df = pd.DataFrame({'Fruits':['Apple','Banana','Orange','Apple','Banana','Banana','Orange']})"
      ],
      "execution_count": 12,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 254
        },
        "id": "CUds0ndrPBMg",
        "outputId": "375889d3-c51b-49f2-9776-1e3c61d801cf"
      },
      "source": [
        "df"
      ],
      "execution_count": 13,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/html": [
              "<div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>Fruits</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>Apple</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>Banana</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>Orange</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3</th>\n",
              "      <td>Apple</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>4</th>\n",
              "      <td>Banana</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>5</th>\n",
              "      <td>Banana</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>6</th>\n",
              "      <td>Orange</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>"
            ],
            "text/plain": [
              "   Fruits\n",
              "0   Apple\n",
              "1  Banana\n",
              "2  Orange\n",
              "3   Apple\n",
              "4  Banana\n",
              "5  Banana\n",
              "6  Orange"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 13
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "Shg4_OK6Paxy"
      },
      "source": [
        "#First Label Encoding. \r\n",
        "from sklearn.preprocessing import LabelEncoder"
      ],
      "execution_count": 14,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "mdIIh8tUPhDf"
      },
      "source": [
        "le = LabelEncoder()"
      ],
      "execution_count": 15,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "n0Rdhb_HPjEq"
      },
      "source": [
        "labelencoded = le.fit_transform(df.iloc[:,0])"
      ],
      "execution_count": 16,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "wEaYlNzNPnf6",
        "outputId": "90caf6cc-75ec-46d2-89a6-ea4bdae4cf25"
      },
      "source": [
        "labelencoded"
      ],
      "execution_count": 17,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "array([0, 1, 2, 0, 1, 1, 2])"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 17
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "qisHAsQDP1IX"
      },
      "source": [
        "df['Fruits'] = labelencoded"
      ],
      "execution_count": 18,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 254
        },
        "id": "IMNgojX9P-D5",
        "outputId": "7162ecd4-83e2-405c-8139-f7941874d2ba"
      },
      "source": [
        "df"
      ],
      "execution_count": 19,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/html": [
              "<div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>Fruits</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>2</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3</th>\n",
              "      <td>0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>4</th>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>5</th>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>6</th>\n",
              "      <td>2</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>"
            ],
            "text/plain": [
              "   Fruits\n",
              "0       0\n",
              "1       1\n",
              "2       2\n",
              "3       0\n",
              "4       1\n",
              "5       1\n",
              "6       2"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 19
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "_IMMA3yjP-y3"
      },
      "source": [
        "#Now onehot encoding. "
      ],
      "execution_count": 20,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "b8QJ5N7IQEGY"
      },
      "source": [
        "from sklearn.preprocessing import OneHotEncoder\r\n",
        "from sklearn.compose import ColumnTransformer"
      ],
      "execution_count": 21,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "VhHQJFvUQNGp"
      },
      "source": [
        "ohe = OneHotEncoder()"
      ],
      "execution_count": 22,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "u1gOPeh5QicH"
      },
      "source": [
        "ohe_encoded = ohe.fit_transform(df).toarray()"
      ],
      "execution_count": 23,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "0upj1QkOQqTl"
      },
      "source": [
        "df = pd.concat([df,pd.DataFrame(ohe_encoded)],axis=1)"
      ],
      "execution_count": 26,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 254
        },
        "id": "kEjXXD1ZQrrm",
        "outputId": "c8163fbc-cca2-4d43-9463-82a259498fa3"
      },
      "source": [
        "df"
      ],
      "execution_count": 27,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/html": [
              "<div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>Fruits</th>\n",
              "      <th>0</th>\n",
              "      <th>1</th>\n",
              "      <th>2</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>0</td>\n",
              "      <td>1.0</td>\n",
              "      <td>0.0</td>\n",
              "      <td>0.0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>1</td>\n",
              "      <td>0.0</td>\n",
              "      <td>1.0</td>\n",
              "      <td>0.0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>2</td>\n",
              "      <td>0.0</td>\n",
              "      <td>0.0</td>\n",
              "      <td>1.0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3</th>\n",
              "      <td>0</td>\n",
              "      <td>1.0</td>\n",
              "      <td>0.0</td>\n",
              "      <td>0.0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>4</th>\n",
              "      <td>1</td>\n",
              "      <td>0.0</td>\n",
              "      <td>1.0</td>\n",
              "      <td>0.0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>5</th>\n",
              "      <td>1</td>\n",
              "      <td>0.0</td>\n",
              "      <td>1.0</td>\n",
              "      <td>0.0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>6</th>\n",
              "      <td>2</td>\n",
              "      <td>0.0</td>\n",
              "      <td>0.0</td>\n",
              "      <td>1.0</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>"
            ],
            "text/plain": [
              "   Fruits    0    1    2\n",
              "0       0  1.0  0.0  0.0\n",
              "1       1  0.0  1.0  0.0\n",
              "2       2  0.0  0.0  1.0\n",
              "3       0  1.0  0.0  0.0\n",
              "4       1  0.0  1.0  0.0\n",
              "5       1  0.0  1.0  0.0\n",
              "6       2  0.0  0.0  1.0"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 27
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "OESFMv_YQ8d9"
      },
      "source": [
        "# ohe_encoded1 = np.array(ColumnTransformer.fit_transform()"
      ],
      "execution_count": 31,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "btyFSsMrRGsB"
      },
      "source": [
        ""
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "AGVn7K0JRkk3"
      },
      "source": [
        ""
      ]
    }
  ]
}